require(tidyverse)
require(plotly)
require(knitr)
require(rstan)
require(reshape2)
require(bayesplot)
require(truncnorm)
require(DirichletReg)Modelo Binomial Negativa
Importanto as bibliotecas necessárias
Gerando os jogos
num_teams <- 20
# Dicionario para relacionar o id de um time com seu nome
team_names <- c("Dragões do Sertão", "Atlético Rio Vermelho", "Borborema",
"Guerreiros da Mata", "Cacique", "Aurora Litorânea",
"Gávea Azul", "Mandacaru United", "Capibaribe",
"Índios Tupiniquins", "Atlético Taquara Verde", "Seriema",
"Blumenau City", "Iguaçu", "Atlético Palmares",
"Serra Dourada", "Sambaqui", "Pampa",
"Riacho do Meio", "Sport Club Xingu")
games <- data.frame(
h = rep(1:num_teams, each = num_teams),
a = rep(1:num_teams, times = num_teams)
)
games <- games[games$h != games$a, ]Definindo os parâmetros dos dados gerados:
set.seed(28)
beta_0 <- -0.1
home_effect <- 0.35
sd_att <- 0.2
sd_def <- 0.2
pi_att <- c(0.45, 0.1, 0.45)
pi_def <- c(0.4, 0.2, 0.4)
mu_att <- c(-0.4, 0, 0.4)
mu_def <- c(0.5, 0, -0.5)
att_category <- sample(1:3, 20, replace = T, prob = pi_att)
def_category <- sample(1:3, 20, replace = T, prob = pi_def)
att_effects <- rnorm(num_teams, mu_att[att_category], sd_att)
def_effects <- rnorm(num_teams, mu_def[def_category], sd_def)Esses foram os efeitos de ataque e defesa gerados:
Gerando os resultados dos jogos
set.seed(40)
simulate_games <- function(games){
num_games <- length(games$h)
home_team <- games$h
away_team <- games$a
theta_1 <- beta_0 + home_effect + att_effects[home_team] + def_effects[away_team]
theta_2 <- beta_0 + att_effects[away_team] + def_effects[home_team]
y1 <- rpois(num_games, exp(theta_1))
y2 <- rpois(num_games, exp(theta_2))
games$y1 <- y1
games$y2 <- y2
return(games)
}
games <- simulate_games(games)
num_games <- nrow(games)Estimando os parâmetros com o STAN
data <- c(G = num_games, T = num_teams, as.list(games))
model <- stan_model("./model/poisson_mistura_simple.stan")
iter <- 10000
fit <- sampling(model, data = data, iter = iter, chains = 2, cores = 2)Warning: There were 11 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: There were 231 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
https://mc-stan.org/misc/warnings.html#bfmi-low
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 1.62, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
data {
int<lower=1> G; // Numero total de jogos
int<lower=1> T; // Numero de total times
int<lower=0, upper=T> h[G]; // Indice do time que joga em casa no G-esimo jogo
int<lower=0, upper=T> a[G]; // Indice do time que joga como visitante no G-esimo jogo
int<lower=0> y1[G]; // Numero de gols marcados no G-esimo jogo pelo time que joga em casa
int<lower=0> y2[G]; // Numero de gols marcados no G-esimo jogo pelo time que joga como visitante
}
parameters {
real home;
real mu;
real<lower=-3, upper=-0.31> mu_att1;
real<lower=0.31, upper=3> mu_att3;
real<lower=0.31, upper=3> mu_def1;
real<lower=-3, upper=-0.31> mu_def3;
real<lower=0> sigma_att[3];
real<lower=0> sigma_def[3];
vector[T] att;
vector[T] def;
simplex[3] pi_att;
simplex[3] pi_def;
}
// transformed parameters {
// vector[T] att;
// vector[T] def;
//
// for (t in 1:(T-1)) {
// att[t] = att_raw[t];
// def[t] = def_raw[t];
// }
// att[T] = 0;
// def[T] = 0;
// }
model {
real m_att[3];
real m_def[3];
mu ~ normal(0, 10);
home ~ normal(0, 10);
mu_att1 ~ normal(0, 10) T[-3,-0.31];
mu_att3 ~ normal(0, 10) T[0.31,3];
mu_def1 ~ normal(0, 10) T[0.31,3];
mu_def3 ~ normal(0, 10) T[-3,-0.31];
sigma_att ~ cauchy(0, 0.5);
sigma_def ~ cauchy(0, 0.5);
pi_att ~ dirichlet(rep_vector(1, 3));
pi_def ~ dirichlet(rep_vector(1, 3));
for (t in 1:T) {
m_att[1] = log(pi_att[1]) + normal_lpdf(att[t] | mu_att1, sigma_att[1]);
m_att[2] = log(pi_att[2]) + normal_lpdf(att[t] | 0, 0.01);
m_att[3] = log(pi_att[3]) + normal_lpdf(att[t] | mu_att3, sigma_att[3]);
m_def[1] = log(pi_def[1]) + normal_lpdf(def[t] | mu_def1, sigma_def[1]);
m_def[2] = log(pi_def[2]) + normal_lpdf(def[t] | 0, 0.01);
m_def[3] = log(pi_def[3]) + normal_lpdf(def[t] | mu_def3, sigma_def[3]);
target += log_sum_exp(m_att) + log_sum_exp(m_def);
}
for (g in 1:G) {
y1[g] ~ poisson_log(mu + home + att[h[g]] + def[a[g]]);
y2[g] ~ poisson_log(mu + att[a[g]] + def[h[g]]);
}
}
generated quantities {
vector[G] y1_tilde;
vector[G] y2_tilde;
vector[G] log_lik;
for (g in 1:G) {
y1_tilde[g] = poisson_log_rng(mu + home + att[h[g]] + def[a[g]]);
y2_tilde[g] = poisson_log_rng(mu + att[a[g]] + def[h[g]]);
log_lik[g] = poisson_log_lpmf(y1[g] | mu + home + att[h[g]] + def[a[g]]) + poisson_log_lpmf(y2[g] | mu + att[a[g]] + def[h[g]]);
}
}Analisando possíveis problemas
Traceplot
Visualizando os valores estimados para 100 campeonatos
